home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Experimental BBS Explossion 3
/
Experimental BBS Explossion III.iso
/
graphics
/
flick_12.zip
/
CHUNKY8.S
< prev
next >
Wrap
Text File
|
1994-02-18
|
9KB
|
451 lines
;
; Flick FLI-format Animation Viewer v1.2 19 Feb 1994
; --------------------------------------
;
;
;This program plays FLI/FLC-format bitmapped animation files on any ECS
;or AGA Amiga running OS2.04 or higher. FLI/FLC-format files are
;produced by Autodesk Animator and Autodesk 3D Studio on a PC, as well
;as by other programs.
;
;The files in this archive may be distributed anywhere provided they are
;unmodified and are not sold for profit.
;
;Ownership and copyright of all files remains with the author:
;
; Peter McGavin, 86 Totara Crescent, Lower Hutt, New Zealand.
; e-mail: peterm@maths.grace.cri.nz
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; xdef _chunky2planar
; Basically the same as peterm/chunky6.s, except unwind loops
; as much as possible. This is much better optimised for a 68040,
; but inefficient on lower processors.
;-----------------------------------------------------------------------------
; Set Macro68 defaults
default _branch,_word
default _adrbasedisp,_word
default _pcbasedisp,_word
default _outerdisp,_word
default _absolute,_pcrel
;-----------------------------------------------------------------------------
; chunky2planar: (new Motorola syntax)
; a0 -> chunky pixels
; a1 -> plane0 (assume other 7 planes are allocated contiguously)
; d1 = width*height/32 (if generic is defined)
ifnd generic
plsiz equ width*height/8
endc
_LVOCacheClearU equ -636
ifd generic
ifeq depth-8
_c2p_8_040::
else
ifeq depth-6
_c2p_6_040::
else
ifeq depth-4
_c2p_4_040::
endc
endc
endc
else
ifeq depth-8
ifeq width-320
_c2p320x200x8_040::
endc
else
ifeq depth-6
ifeq width-320
_c2p320x200x6_040::
endc
else
die "Unrecognised resolution"
endc
endc
endc
movem.l d2-d7/a2-a6,-(sp)
bset #0,(firsttimeflag)
bne.b skip_relocate ; branch if not being called 1st time
; relocate the mainloop to a quad-longword boundary (for 030/040 cache line)
lea (begincode,pc),a2
adda.w #15,a2
move.l a2,d0
and.w #~15,d0
movea.l d0,a2
lea (mainloop,pc),a3
move.w #(endcode-mainloop)/2-1,d0
1$: move.w (a3)+,(a2)+
dbra d0,1$
; flush the caches
movem.l a0/a1/d1,-(sp)
movea.l (4).w,a6
jsr (_LVOCacheClearU,a6)
movem.l (sp)+,a0/a1/d1
skip_relocate: move.w sp,d0
and.w #15,d0
add.w #64,d0 ; make room on stack for
suba.w d0,sp ; 64-byte quad-longword aligned buffer
movea.l sp,a3 ; pointed to by a3
move.w d0,-(sp) ; and save the allocated size
ifd generic
move.l d1,-(sp) ; plsiz on stack at (6,sp)
move.l d1,d0
ifgt depth-4
lsl.l #3,d0
else
lsl.l #2,d0
endc
sub.l d1,d0
move.l d0,-(sp) ; 7*plsiz or 3*plsiz on stack at (2,sp)
lsr.l #2,d1
subq.l #1,d1
move.w d1,-(sp) ; outer loop counter on stack at (sp)
else
move.w #plsiz/4-1,-(sp) ; outer loop counter on stack at (sp)
endc
; set up register constants
move.l #$0f0f0f0f,d5 ; d5 = constant $0f0f0f0f
move.l #$55555555,d6 ; d6 = constant $55555555
move.l #$3333cccc,d7 ; d7 = constant $3333cccc
; load up address registers with buffer ptrs
lea (4*4,a3),a4 ; a4 -> plane2buf
ifgt depth-4
lea (4*4,a4),a5 ; a5 -> plane4buf
lea (4*4,a5),a6 ; a6 -> plane6buf
endc
; Macros part1 and part2 together convert 8 pixels from chunky to stack buffers
part1 macro
move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2
move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3
ifgt depth-4
move.l d2,d0 ; 4
and.l d5,d2 ; 8 d5=$0f0f0f0f
move.l d3,d1 ; 4
and.l d5,d3 ; 8 d5=$0f0f0f0f
eor.l d2,d0 ; 8
eor.l d3,d1 ; 8
lsr.l #4,d1 ; 16
or.l d1,d0 ; 8
endc
lsl.l #4,d2 ; 16
or.l d3,d2 ; 8
move.l d2,d3 ; 4
and.l d7,d3 ; 8 d7=$3333cccc
eor.l d3,d2 ; 8
lsr.w #2,d3 ; 10
swap d3 ; 4
lsl.w #2,d3 ; 10
or.l d2,d3 ; 8
ifgt depth-4
move.l d0,d1 ; 4
and.l d7,d1 ; 8 d7=$3333cccc
eor.l d1,d0 ; 8
lsr.w #2,d1 ; 10
swap d1 ; 4
lsl.w #2,d1 ; 10
or.l d0,d1 ; 8
move.l d1,d2 ; 4
lsr.l #7,d2 ; 22
move.l d1,d0 ; 4
and.l d6,d0 ; 8 d6=$55555555
endc
endm
part2 macro
ifgt depth-4
eor.l d0,d1 ; 8
move.l d2,d4 ; 4
and.l d6,d4 ; 8 d6=$55555555
eor.l d4,d2 ; 8
or.l d4,d1 ; 8
lsr.l #1,d1 ; 10
move.b d1,(8,a5) ; 12 plane 5
ifgt depth-6
swap d1 ; 4
move.b d1,(8,a6) ; 12 plane 7
endc
or.l d0,d2 ; 8
move.b d2,(a5)+ ; 8 plane 4
ifgt depth-6
swap d2 ; 4
move.b d2,(a6)+ ; 8 plane 6
endc
endc
move.l d3,d2 ; 4
lsr.l #7,d2 ; 22
move.l d3,d0 ; 4
and.l d6,d0 ; 8 d6=$55555555
eor.l d0,d3 ; 8
move.l d2,d4 ; 4
and.l d6,d4 ; 8 d6=$55555555
eor.l d4,d2 ; 8
or.l d4,d3 ; 8
lsr.l #1,d3 ; 10
move.b d3,(8,a3) ; 12 plane 1
swap d3 ; 4
move.b d3,(8,a4) ; 12 plane 3
or.l d0,d2 ; 8
move.b d2,(a3)+ ; 8 plane 0
swap d2 ; 4
move.b d2,(a4)+ ; 8 plane 2
endm
; optimised suba
subao macro
ifd generic
suba.l (6,sp),a1
else
iflt plsiz-32768
suba.w #plsiz,a1
else
suba.l #plsiz,a1
endc
endc
endm
; convert the first 32 pixels to stack buffers as a special case
rept 4
part1
part2
endr
begincode: rept 8 ; space for mainloop code relocation
nop
endr
; main loop (starts here) processes 64 chunky pixels at a time
mainloop:
; Process the next 32 pixels from chunky to stack buffers while at the same
; time moving the result of the previous 32 pixels from stack buffers to
; Chip ram planes.
; Chip writes are spaced as widely apart as possible, so that there is
; always something useful happening while waiting for the Chip bus.
ifgt depth-4
ifd generic
adda.l (2,sp),a1 ; add 7*plsiz
else
adda.l #7*plsiz,a1 ; a1 points into plane 0
endc
ifgt depth-6
move.l (4,a6),(a1) ; plane 7
endc
part1
subao
ifgt depth-6
move.l (-4,a6),(a1) ; plane 6
endc
part2
subao
move.l (3,a5),(a1) ; plane 5
part1
subao
move.l (-5,a5),(a1) ; plane 4
part2
subao
move.l (2,a4),(a1) ; plane 3
part1
subao
move.l (-6,a4),(a1) ; plane 2
part2
subao
move.l (1,a3),(a1) ; plane 1
part1
subao
move.l (-7,a3),(a1)+ ; plane 0
part2
else
ifd generic
adda.l (2,sp),a1 ; add 3*plsiz
else
adda.l #3*plsiz,a1 ; a1 points into plane 0
endc
move.l (4,a4),(a1) ; plane 3
part1
part2
subao
move.l (-5,a4),(a1) ; plane 2
part1
part2
subao
move.l (2,a3),(a1) ; plane 1
part1
part2
subao
move.l (-7,a3),(a1)+ ; plane 0
part1
part2
endc
; check if finished
sub.w #1,(sp)
beq.w done
; restore stack buffer pointers
subq.l #8,a3
subq.l #8,a4
ifgt depth-4
subq.l #8,a5
subq.l #8,a6
endc
; Process the next 32 pixels from chunky to stack buffers while at the same
; time moving the result of the previous 32 pixels from stack buffers to
; Chip ram planes.
ifgt depth-4
ifd generic
adda.l (2,sp),a1 ; add 7*plsiz
else
adda.l #7*plsiz,a1 ; a1 points into plane 0
endc
ifgt depth-6
move.l (12,a6),(a1) ; plane 7
endc
part1
subao
ifgt depth-6
move.l (4,a6),(a1) ; plane 6
endc
part2
subao
move.l (11,a5),(a1) ; plane 5
part1
subao
move.l (3,a5),(a1) ; plane 4
part2
subao
move.l (10,a4),(a1) ; plane 3
part1
subao
move.l (2,a4),(a1) ; plane 2
part2
subao
move.l (9,a3),(a1) ; plane 1
part1
subao
move.l (1,a3),(a1)+ ; plane 0
part2
else
ifd generic
adda.l (2,sp),a1 ; add 3*plsiz
else
adda.l #3*plsiz,a1 ; a1 points into plane 0
endc
move.l (12,a4),(a1) ; plane 7
part1
part2
subao
move.l (3,a4),(a1) ; plane 5
part1
part2
subao
move.l (10,a3),(a1) ; plane 3
part1
part2
subao
move.l (1,a3),(a1)+ ; plane 1
part1
part2
endc
; check if finished, go back for more
sub.w #1,(sp)
bne.w mainloop
; correction
addq.l #4,a3
addq.l #4,a4
addq.l #4,a5
addq.l #4,a6
; write the last longword from stack buffer to planes
done: ifd generic
adda.l (2,sp),a1 ; add 7*plsiz or 3*plsiz
ifgt depth-4
ifgt depth-6
move.l (4,a6),(a1) ; plane 7
endc
subao
ifgt depth-6
move.l (-4,a6),(a1) ; plane 6
endc
subao
endc
else
adda.l #(depth-1)*plsiz,a1 ; a1 points into plane 7
ifgt depth-6
move.l (4,a6),(a1) ; plane 7
subao
move.l (-4,a6),(a1) ; plane 6
subao
endc
endc
ifgt depth-4
move.l (4,a5),(a1) ; plane 5
subao
move.l (-4,a5),(a1) ; plane 4
subao
endc
move.l (4,a4),(a1) ; plane 3
subao
move.l (-4,a4),(a1) ; plane 2
subao
move.l (4,a3),(a1) ; plane 1
subao
move.l (-4,a3),(a1)+ ; plane 0
; all done! restore stack and return
ifd generic
adda.w #4+4+2,sp ; remove stack variables
else
addq.w #2,sp ; remove outer loop counter
endc
adda.w (sp)+,sp ; remove aligned 32-byte buffer
movem.l (sp)+,d2-d7/a2-a6
rts
endcode:
firsttimeflag: dc.b 0
even
;-----------------------------------------------------------------------------
end